import pandas as pd
from datetime import datetime
import numpy as np
import chart_studio.plotly as py
import seaborn as sns
import plotly.express as px
import cufflinks as cf
%matplotlib inline
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected = True)
cf.go_offline()
import warnings
warnings.filterwarnings('ignore')
I download the data from https://covid.ourworldindata.org/data
pd.set_option('display.max_columns', None)
url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
parser = lambda x: pd.datetime.strptime(x, '%Y-%m-%d')
df = pd.read_csv(url, parse_dates=['date'], date_parser=parser)
df = df[df['date'] >= '2020-03-04']
Adding Year, Month, Day Column
df['year'] = [ x.year for x in df.date]
df['month'] = [x.month for x in df.date]
df['day'] = [x.day for x in df.date]
df['day_name'] = [x.day_name() for x in df.date]
df['week_nr'] = [x.isocalendar()[1] for x in df.date]
df['month_name'] = [x.strftime("%b") for x in df.date]
# Preparing the number of weeks of the pandemic
df.loc[df['date'] <= '2021-01-03', 'week_nr'] = df['week_nr'] - 9
df.loc[df['date'] > '2021-01-03', 'week_nr'] = df['week_nr'] + 45
df['fatality'] = df['total_deaths'] / df['total_cases']
df['fatality'] = df['fatality'] * 100
df['fatality'] = [round(x,2) for x in df['fatality']]
df = df.replace(np.nan,0)
Adding % infected, one dose/fully vacinated people
df['infected'] = round(df['total_cases'] / df['population'] *100,2)
df['vacinated'] = round(df['people_vaccinated'] / df['population'] * 100)
df['vacinated_fully'] = round(df['people_fully_vaccinated'] / df['population'] * 100)
df
| iso_code | continent | location | date | total_cases | new_cases | new_cases_smoothed | total_deaths | new_deaths | new_deaths_smoothed | total_cases_per_million | new_cases_per_million | new_cases_smoothed_per_million | total_deaths_per_million | new_deaths_per_million | new_deaths_smoothed_per_million | reproduction_rate | icu_patients | icu_patients_per_million | hosp_patients | hosp_patients_per_million | weekly_icu_admissions | weekly_icu_admissions_per_million | weekly_hosp_admissions | weekly_hosp_admissions_per_million | new_tests | total_tests | total_tests_per_thousand | new_tests_per_thousand | new_tests_smoothed | new_tests_smoothed_per_thousand | positive_rate | tests_per_case | tests_units | total_vaccinations | people_vaccinated | people_fully_vaccinated | total_boosters | new_vaccinations | new_vaccinations_smoothed | total_vaccinations_per_hundred | people_vaccinated_per_hundred | people_fully_vaccinated_per_hundred | total_boosters_per_hundred | new_vaccinations_smoothed_per_million | stringency_index | population | population_density | median_age | aged_65_older | aged_70_older | gdp_per_capita | extreme_poverty | cardiovasc_death_rate | diabetes_prevalence | female_smokers | male_smokers | handwashing_facilities | hospital_beds_per_thousand | life_expectancy | human_development_index | excess_mortality | year | month | day | day_name | week_nr | month_name | fatality | infected | vacinated | vacinated_fully | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 9 | AFG | Asia | Afghanistan | 2020-03-04 | 5.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.000 | 0.126 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 27.78 | 39835428.0 | 54.422 | 18.6 | 2.581 | 1.337 | 1803.987 | 0.0 | 597.029 | 9.59 | 0.0 | 0.0 | 37.746 | 0.5 | 64.83 | 0.511 | 0.0 | 2020 | 3 | 4 | Wednesday | 1 | Mar | 0.00 | 0.00 | 0.0 | 0.0 |
| 10 | AFG | Asia | Afghanistan | 2020-03-05 | 5.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.000 | 0.126 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 27.78 | 39835428.0 | 54.422 | 18.6 | 2.581 | 1.337 | 1803.987 | 0.0 | 597.029 | 9.59 | 0.0 | 0.0 | 37.746 | 0.5 | 64.83 | 0.511 | 0.0 | 2020 | 3 | 5 | Thursday | 1 | Mar | 0.00 | 0.00 | 0.0 | 0.0 |
| 11 | AFG | Asia | Afghanistan | 2020-03-06 | 5.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.000 | 0.126 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 27.78 | 39835428.0 | 54.422 | 18.6 | 2.581 | 1.337 | 1803.987 | 0.0 | 597.029 | 9.59 | 0.0 | 0.0 | 37.746 | 0.5 | 64.83 | 0.511 | 0.0 | 2020 | 3 | 6 | Friday | 1 | Mar | 0.00 | 0.00 | 0.0 | 0.0 |
| 12 | AFG | Asia | Afghanistan | 2020-03-07 | 8.0 | 3.0 | 0.429 | 0.0 | 0.0 | 0.000 | 0.201 | 0.075 | 0.011 | 0.000 | 0.000 | 0.000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 27.78 | 39835428.0 | 54.422 | 18.6 | 2.581 | 1.337 | 1803.987 | 0.0 | 597.029 | 9.59 | 0.0 | 0.0 | 37.746 | 0.5 | 64.83 | 0.511 | 0.0 | 2020 | 3 | 7 | Saturday | 1 | Mar | 0.00 | 0.00 | 0.0 | 0.0 |
| 13 | AFG | Asia | Afghanistan | 2020-03-08 | 8.0 | 0.0 | 0.429 | 0.0 | 0.0 | 0.000 | 0.201 | 0.000 | 0.011 | 0.000 | 0.000 | 0.000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 27.78 | 39835428.0 | 54.422 | 18.6 | 2.581 | 1.337 | 1803.987 | 0.0 | 597.029 | 9.59 | 0.0 | 0.0 | 37.746 | 0.5 | 64.83 | 0.511 | 0.0 | 2020 | 3 | 8 | Sunday | 1 | Mar | 0.00 | 0.00 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 116762 | ZWE | Africa | Zimbabwe | 2021-09-10 | 126163.0 | 107.0 | 118.857 | 4532.0 | 11.0 | 10.714 | 8359.500 | 7.090 | 7.875 | 300.288 | 0.729 | 0.710 | 0.59 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2411.0 | 1180285.0 | 78.205 | 0.160 | 4185.0 | 0.277 | 0.028 | 35.2 | tests performed | 4656448.0 | 2824296.0 | 1832152.0 | 0.0 | 54428.0 | 40605.0 | 30.85 | 18.71 | 12.14 | 0.0 | 2690.0 | 0.00 | 15092171.0 | 42.729 | 19.6 | 2.822 | 1.882 | 1899.775 | 21.4 | 307.846 | 1.82 | 1.6 | 30.7 | 36.791 | 1.7 | 61.49 | 0.571 | 0.0 | 2021 | 9 | 10 | Friday | 81 | Sep | 3.59 | 0.84 | 19.0 | 12.0 |
| 116763 | ZWE | Africa | Zimbabwe | 2021-09-11 | 126220.0 | 57.0 | 113.571 | 4536.0 | 4.0 | 10.000 | 8363.277 | 3.777 | 7.525 | 300.553 | 0.265 | 0.663 | 0.60 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2166.0 | 1182451.0 | 78.349 | 0.144 | 4108.0 | 0.272 | 0.028 | 36.2 | tests performed | 4708905.0 | 2844848.0 | 1864057.0 | 0.0 | 52457.0 | 44094.0 | 31.20 | 18.85 | 12.35 | 0.0 | 2922.0 | 0.00 | 15092171.0 | 42.729 | 19.6 | 2.822 | 1.882 | 1899.775 | 21.4 | 307.846 | 1.82 | 1.6 | 30.7 | 36.791 | 1.7 | 61.49 | 0.571 | 0.0 | 2021 | 9 | 11 | Saturday | 81 | Sep | 3.59 | 0.84 | 19.0 | 12.0 |
| 116764 | ZWE | Africa | Zimbabwe | 2021-09-12 | 126269.0 | 49.0 | 102.714 | 4538.0 | 2.0 | 8.000 | 8366.523 | 3.247 | 6.806 | 300.686 | 0.133 | 0.530 | 0.60 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2035.0 | 1184486.0 | 78.483 | 0.135 | 3978.0 | 0.264 | 0.026 | 38.7 | tests performed | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 42719.0 | 0.00 | 0.00 | 0.00 | 0.0 | 2831.0 | 0.00 | 15092171.0 | 42.729 | 19.6 | 2.822 | 1.882 | 1899.775 | 21.4 | 307.846 | 1.82 | 1.6 | 30.7 | 36.791 | 1.7 | 61.49 | 0.571 | 0.0 | 2021 | 9 | 12 | Sunday | 81 | Sep | 3.59 | 0.84 | 0.0 | 0.0 |
| 116765 | ZWE | Africa | Zimbabwe | 2021-09-13 | 126399.0 | 130.0 | 104.000 | 4543.0 | 5.0 | 7.143 | 8375.137 | 8.614 | 6.891 | 301.017 | 0.331 | 0.473 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 4752356.0 | 2856655.0 | 1895701.0 | 0.0 | 0.0 | 41369.0 | 31.49 | 18.93 | 12.56 | 0.0 | 2741.0 | 0.00 | 15092171.0 | 42.729 | 19.6 | 2.822 | 1.882 | 1899.775 | 21.4 | 307.846 | 1.82 | 1.6 | 30.7 | 36.791 | 1.7 | 61.49 | 0.571 | 0.0 | 2021 | 9 | 13 | Monday | 82 | Sep | 3.59 | 0.84 | 19.0 | 13.0 |
| 116766 | ZWE | Africa | Zimbabwe | 2021-09-14 | 126817.0 | 418.0 | 145.857 | 4550.0 | 7.0 | 6.714 | 8402.833 | 27.696 | 9.664 | 301.481 | 0.464 | 0.445 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | 15092171.0 | 42.729 | 19.6 | 2.822 | 1.882 | 1899.775 | 21.4 | 307.846 | 1.82 | 1.6 | 30.7 | 36.791 | 1.7 | 61.49 | 0.571 | 0.0 | 2021 | 9 | 14 | Tuesday | 82 | Sep | 3.59 | 0.84 | 0.0 | 0.0 |
114675 rows × 72 columns
country = 'Poland'
df_country = df[df['location'] == country]
fig = px.line(df_country, x='date', y='fatality',
title=f'Fatality During Covid-19 Outbreak in {country}',
labels={'date': 'Date', 'fatality':'Fatality[%]'})
fig
df_vac = df_country[df_country['vacinated'] != 0]
fig = go.Figure()
fig.add_trace(go.Line(x=df_vac['date'], y=df_vac['vacinated'],
name='People vacinated [%]'))
fig.add_trace(go.Line(x=df_vac['date'], y=df_vac['vacinated_fully'],
name='People vacinated fully [%]'))
fig.update_layout(
title_text=f"Vacinations in {country}")
fig.update_xaxes(title_text ='Date')
fig.update_yaxes(title_text ='Percentage ')
fig
df_vacinated = df.groupby('location')
df_vacinated = df_vacinated[['vacinated','vacinated_fully']].max()
df_vacinated = df_vacinated[df_vacinated['vacinated'] < 100]
fig = make_subplots(specs=[[{"secondary_y": True}]])
df_hosp = df_country[df_country['hosp_patients'] != 0]
fig.add_trace(go.Line(x=df_hosp['date'], y=df_hosp['hosp_patients'],
name='Hospitialized Patients'),secondary_y=False)
fig.add_trace(go.Line(x=df_hosp['date'], y=df_hosp['stringency_index'],
name='Stringency index'), secondary_y=True)
fig.update_layout(
title_text=f"Restriction in {country}")
fig.update_xaxes(title_text ='Date')
fig.update_yaxes(title_text ='Stringency index',secondary_y=True,
title_font=dict(color='red'))
fig.update_yaxes(title_text ='Hospitalized people',secondary_y=False,
title_font=dict(color='blue'))
fig.update_layout(showlegend=False)
fig
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Line(x=df_country['date'], y=df_country['reproduction_rate'],
name='Reproduction rate'), secondary_y = True)
fig.add_trace(go.Line(x=df_country['date'], y=df_country['new_cases_smoothed'],
name='New Cases'), secondary_y = False)
fig.update_layout(title_text=f'Reproduction Rate in {country}')
fig.update_yaxes(title_text ='Reproduction Rate',secondary_y=True,
title_font=dict(color='blue'))
fig.update_yaxes(title_text ='New Cases',secondary_y=False,
title_font=dict(color='red'))
fig.update_layout(showlegend=False)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Line(x=df_country['date'], y=df_country['infected'],
name='% Infected Society'), secondary_y = True)
fig.add_trace(go.Line(x=df_country['date'], y=df_country['new_cases_smoothed'],
name='New Cases'),secondary_y = False)
fig.add_trace(go.Line(x=df_country['date'], y=df_country['new_tests_smoothed'],
name='New Tests'),secondary_y = False)
fig.update_layout(
title_text=f"The course of the Covid-19 pandemic in {country}")
fig.update_xaxes(title_text ='Date')
fig.update_yaxes(title_text = '% <b>Infected</b>', secondary_y = True)
fig.update_yaxes(title_text = '<b>Quantity</b>', secondary_y = False)
best_vacinated = df_vacinated.nlargest(17,'vacinated_fully')
countries_vac = best_vacinated.index
fig = go.Figure()
fig.add_trace(go.Bar(x = countries_vac, y = best_vacinated.vacinated_fully,
name = 'With a full dose',
marker_color = 'steelblue'))
fig.add_trace(go.Bar(x = countries_vac, y = best_vacinated.vacinated,
name = 'At least one dose',
marker_color ='lightskyblue'))
fig.update_layout(title_text = "Best vaccinating countries",
legend_title_text='Percentage vaccinated %')
fig
Preapering Countries DF
countries_sum = df.groupby(['iso_code']).sum()
countries_mean = df.groupby(['iso_code']).mean()
countries_med = df.groupby(['iso_code']).median()
countries_sum = countries_sum.drop(columns=['day', 'month', 'year','total_deaths','total_cases_per_million','total_deaths_per_million',
'reproduction_rate','extreme_poverty','gdp_per_capita','aged_70_older','aged_65_older',
'median_age','population_density','fatality','stringency_index','icu_patients','icu_patients_per_million',
'population','handwashing_facilities','human_development_index','people_fully_vaccinated_per_hundred',
'life_expectancy'])
iso = countries_sum.index.tolist()
countries_last = df.drop_duplicates(subset=['iso_code'],keep='last')
countries_last = countries_last[countries_last['continent'] != 0]
columns = df.columns
continent = 'Europe'
How to use the chart below?
df_EU = df[df['continent'] == continent]
fig = px.line(df_EU, x='date', y='new_cases_smoothed',
color='location',
title=f'Covid-19 New Cases in {continent}')
fig.update_yaxes(title_text = 'New Cases')
fig.update_xaxes(title_text = 'Date')
fig.update_xaxes(rangeslider_visible=True)
animation_frame = df_EU.date.astype(str)
fig = px.bar(x=df_EU['location'], y=df_EU['total_cases'], animation_frame=animation_frame, color=df_EU['fatality'],
title=f'The course of a pandemic in {continent} - Color represents mortality[%]')
fig.update_yaxes(title_text = 'Total Cases')
fig.show()
df_sum_per_day = df.groupby("date").sum()
df_sum_per_day = pd.merge(df_sum_per_day.reset_index(),
df[['date','day_name', 'week_nr','month_name']],
on='date',
how='left')
fig = go.Figure(data=go.Heatmap(
z=df_sum_per_day.new_cases,
x=df_sum_per_day.day_name,
y=df_sum_per_day.month_name,
colorscale='temps'))
fig.update_layout(title="Heatmap of New Cases during the COVID-19 pandemic")
fig.show()
Coordinates of each country
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent = 'mStachowicz')
def geolocate(country):
"""Geolocace the center of the country
Args:
country (str): Country name
Return:
(lat, lon) (float): Latitude and Longitude
"""
try:
loc = geolocator.geocode(country)
# And return latitude and longitude
return (loc.latitude, loc.longitude)
except:
print("Country not found!")
return np.nan
countries_last['coordinates'] = countries_last['location'].apply(geolocate)
countries_last['lat'] = countries_last['coordinates'].apply(lambda x: x[0])
countries_last['lon'] = countries_last['coordinates'].apply(lambda x: x[1])
def human_format(num: float) -> str:
"""Writes a number using suffixes
Args:
number (str): Country name
Returns:
human format number (str)
"""
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
countries_last['total_cases_prettify'] = [ human_format(x) for x in countries_last['total_cases']]
countries_last['total_deaths_prettify'] = [ human_format(x) for x in countries_last['total_deaths']]
# Prepering markers
countries_last['summary'] = (
'<b>'
+ 'Country: '
+ countries_last['location']
+ '</b>'
+ '<br><br>'
+ 'Confirmed: '
+ countries_last['total_cases_prettify'].astype(str)
+ '<br>'
+ 'Deaths: '
+ countries_last['total_deaths_prettify'].astype(str)
+ '<br>'
+ 'Fatality: '
+ countries_last['fatality'].astype(str)
+ "%")
countries_last['fatality'] = countries_last['fatality'].apply(lambda x: round(x,2))
fig = go.Figure()
fig.add_trace(go.Scattergeo(
lat = countries_last.lat,
lon = countries_last.lon,
text = countries_last.summary,
marker = dict(
size = countries_last.total_cases / 200000,
color = countries_last.fatality,
opacity = 0.8))
)
fig.update_layout(title_text = 'Current status of the epidemic - color represents fatality')
fig.update_geos(showcountries=True)
fig = go.Figure(data=go.Choropleth(
locations=iso,
z = countries_mean['fatality'],
locationmode='ISO-3',
colorscale="sunset"
))
fig.update_layout(title_text ='Mean fatality During Covid-19 outbreak')
fig.update_layout(legend_title_text = "Fatality[%]")
animation_frame = df.date.astype(str)
px.choropleth(df,
locations='iso_code',
color='total_cases',
animation_frame=animation_frame,
animation_group='location',
locationmode='ISO-3',
color_continuous_scale="dense",
range_color=(0, 30000000),
title = "Course of the Covid-19 epidemic")
# Saving database to excel
countries_last.to_excel('countries_database.xlsx')
countries_plot = countries_last[countries_last['total_deaths'] != 0]
countries_plot
fig = px.sunburst(countries_plot, path=['continent', 'location'], values='total_deaths',
color='hospital_beds_per_thousand', hover_data=['iso_code'],
title='Distribution of all deaths',
color_continuous_scale=["red", "yellow", "green"])
fig.show()
fig = px.treemap(countries_plot, path=[px.Constant('world'), 'continent', 'location'], values='total_deaths',
color='aged_65_older', hover_data=['total_deaths'],
title='Total Deaths depend on % people over 65 years old')
fig.show()
What deaths are correlated with ?
df_cov = countries_plot.cov()
df_cov
df_cov['total_deaths'].nlargest(15)
population 5.366915e+12 total_tests 1.293948e+12 total_cases 3.198286e+11 total_deaths 6.366684e+09 new_tests_smoothed 3.835412e+09 new_tests 3.457396e+09 new_cases_smoothed 7.163308e+08 total_cases_per_million 6.597328e+08 new_cases 6.562804e+08 gdp_per_capita 1.528388e+08 total_deaths_per_million 2.577867e+07 new_deaths 1.069128e+07 new_deaths_smoothed 1.058307e+07 total_tests_per_thousand 7.443680e+05 new_cases_smoothed_per_million 1.402597e+05 Name: total_deaths, dtype: float64